# @shuaige : 陈世玉
# @name :SpiderCollage.py
# @time :2024/12/6 14:03
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

path = 'chromedriver.exe'
browser = webdriver.Chrome(path)
url = 'https://www.shanghairanking.cn/rankings/arwu/2023'
browser.get(url)
# 拉到最底部
i=0
with open ('data.txt','w',encoding='utf-8') as f:
    while i <10 :
        i+=1
        print("正在爬取第"+str(i)+"页")
        world_levels=browser.find_elements_by_xpath('//td[1]')
        names=browser.find_elements_by_xpath("//td[2]")
        regions=browser.find_elements_by_xpath('//td[3]')
        area_levels=browser.find_elements_by_xpath('//td[4]')
        scores=browser.find_elements_by_xpath('//td[5]')
        xiaoyous=browser.find_elements_by_xpath('//td[6]')

        for world_level,name,region,area_level,score,xiaoyou in zip(world_levels,names,regions,area_levels,scores,xiaoyous):
            print(world_level.text,name.text,region.text,area_level.text,score.text,xiaoyou.text)
            f.write(world_level.text+','+name.text+','+region.text+','+area_level.text+','+score.text+','+xiaoyou.text+'\n')

        browser.execute_script("window.scrollTo(0,document.body.scrollHeight)")
        time.sleep(2)

        # 显式等待，直到 "下一页" 按钮可点击
        next_button = WebDriverWait(browser, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, ".ant-pagination-next"))
        )

        # 点击 "下一页" 按钮
        next_button.click()
        time.sleep(2)

    browser.quit()
